vmx: Add support for Pause-Loop Exiting
authorKeir Fraser <keir.fraser@citrix.com>
Tue, 7 Jul 2009 13:06:35 +0000 (14:06 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Tue, 7 Jul 2009 13:06:35 +0000 (14:06 +0100)
New NHM processors will support Pause-Loop Exiting by adding 2
VM-execution control fields:
PLE_Gap    - upper bound on the amount of time between two successive
             executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to
             execute in a PAUSE loop

If the time, between this execution of PAUSE and previous one, exceeds
the PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this
loop(since 1st PAUSE in this loop), and triggers a VM exit if total
time exceeds the PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.

Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where
one VP is sched-out after hold a spinlock, then other VPs for same
lock are sched-in to waste the CPU time.

Our tests indicate that most spinlocks are held for less than 2^12
cycles.  Performance tests show that with 2X LP over-commitment we can
get +2% perf improvement for kernel build(Even more perf gain with
more LPs).

Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
xen/arch/x86/hvm/svm/svm.c
xen/arch/x86/hvm/vmx/vmcs.c
xen/arch/x86/hvm/vmx/vmx.c
xen/include/asm-x86/hvm/vmx/vmcs.h
xen/include/asm-x86/perfc_defn.h

index be78c65555e4ef461c1014a3bae2198029338da1..4cd40fe0b14483c49a4cdf6a4d3456e4b503d8b9 100644 (file)
@@ -1504,8 +1504,9 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
          * The guest is running a contended spinlock and we've detected it.
          * Do something useful, like reschedule the guest
          */
-       do_sched_op_compat(SCHEDOP_yield, 0);
-       break;
+        perfc_incr(pauseloop_exits);
+        do_sched_op_compat(SCHEDOP_yield, 0);
+        break;
 
     default:
     exit_and_crash:
index cd6e44073da5270694d92744a6a5483a7702aa6f..db461da52ec9bc9cc704cbd05b22cc363d17a8e8 100644 (file)
@@ -44,6 +44,20 @@ boolean_param("vpid", opt_vpid_enabled);
 static int opt_unrestricted_guest_enabled = 1;
 boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
 
+/*
+ * These two parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap:    upper bound on the amount of time between two successive
+ *             executions of PAUSE in a loop.
+ * ple_window: upper bound on the amount of time a guest is allowed to execute
+ *             in a PAUSE loop.
+ * Time is measured based on a counter that runs at the same rate as the TSC,
+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
+ */
+static unsigned int ple_gap = 41;
+integer_param("ple_gap", ple_gap);
+static unsigned int ple_window = 4096;
+integer_param("ple_window", ple_window);
+
 /* Dynamic (run-time adjusted) execution control flags. */
 u32 vmx_pin_based_exec_control __read_mostly;
 u32 vmx_cpu_based_exec_control __read_mostly;
@@ -140,7 +154,8 @@ static void vmx_init_vmcs_config(void)
         min = 0;
         opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_WBINVD_EXITING |
-               SECONDARY_EXEC_ENABLE_EPT);
+               SECONDARY_EXEC_ENABLE_EPT |
+               SECONDARY_EXEC_PAUSE_LOOP_EXITING);
         if ( opt_vpid_enabled )
             opt |= SECONDARY_EXEC_ENABLE_VPID;
         if ( opt_unrestricted_guest_enabled )
@@ -168,6 +183,13 @@ static void vmx_init_vmcs_config(void)
                   SECONDARY_EXEC_UNRESTRICTED_GUEST);
     }
 
+    if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
+          ple_gap == 0 )
+    {
+        printk("Disable Pause-Loop Exiting.\n");
+        _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+    }
+
 #if defined(__i386__)
     /* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
     if ( !(_vmx_secondary_exec_control &
@@ -556,6 +578,12 @@ static int construct_vmcs(struct vcpu *v)
     __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
     __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
 
+    if ( cpu_has_vmx_ple )
+    {
+        __vmwrite(PLE_GAP, ple_gap);
+        __vmwrite(PLE_WINDOW, ple_window);
+    }
+
     if ( cpu_has_vmx_secondary_exec_control )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
                   v->arch.hvm_vmx.secondary_exec_control);
index fdfcb8d2e6738e5bcc3e58033f6badbb05e49d01..e568a2b563f0c64e3d1ea8e0defe619aa384ef56 100644 (file)
@@ -2609,13 +2609,16 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
     }
 
     case EXIT_REASON_MONITOR_TRAP_FLAG:
-    {
         v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
         __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
         if ( v->domain->debugger_attached && v->arch.hvm_vcpu.single_step )
             domain_pause_for_debugger();
         break;
-    }
+
+    case EXIT_REASON_PAUSE_INSTRUCTION:
+        perfc_incr(pauseloop_exits);
+        do_sched_op_compat(SCHEDOP_yield, 0);
+        break;
 
     default:
     exit_and_crash:
index c2b468e66467af1b03dfd6b5a59ab898de0e80d5..1f8e665d194395f53319a00d817c2e996eff10bd 100644 (file)
@@ -171,6 +171,7 @@ extern u32 vmx_vmentry_control;
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING           0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST       0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING       0x00000400
 extern u32 vmx_secondary_exec_control;
 
 extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -200,6 +201,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
 #define vmx_unrestricted_guest(v)               \
     ((v)->arch.hvm_vmx.secondary_exec_control & \
      SECONDARY_EXEC_UNRESTRICTED_GUEST)
+#define cpu_has_vmx_ple \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
@@ -279,6 +282,8 @@ enum vmcs_field {
     VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
     TPR_THRESHOLD                   = 0x0000401c,
     SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+    PLE_GAP                         = 0x00004020,
+    PLE_WINDOW                      = 0x00004022,
     VM_INSTRUCTION_ERROR            = 0x00004400,
     VM_EXIT_REASON                  = 0x00004402,
     VM_EXIT_INTR_INFO               = 0x00004404,
index 9a2697c69e5ed876472bfd2eaea8eb320955a7e9..274533e4f32f380581932940c319dbce50dfecee 100644 (file)
@@ -130,4 +130,6 @@ PERFCOUNTER(mshv_wrmsr_eoi,             "MS Hv wrmsr eoi")
 PERFCOUNTER(realmode_emulations, "realmode instructions emulated")
 PERFCOUNTER(realmode_exits,      "vmexits from realmode")
 
+PERFCOUNTER(pauseloop_exits, "vmexits from Pause-Loop Detection")
+
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */